Description:
In some cases the parser collects string literals first and defer encoding conversion operations -- this is a result of WL#7200's refactoring.
During that deferred operations the parser references a 7bit-ness flag that is cached by the lexical scanner:
PTI_text_literal_text_string::itemize():
uint repertoire= thd->m_parser_state->m_lip.text_string_is_7bit() &&
my_charset_is_ascii_based(cs_cli) ?
However, m_lip.text_string_is_7bit() returns the 7bit status of the last processed text literal -- not the status of this->literal.
Thus, if some SQL statement contains a sequence of text literals, where some literals require encoding conversion, but the last literal in the sequence is a pure 7bit string, then the PTI_text_literal_text_string::itemize() may miss the conversion.
How to repeat:
CREATE DATABASE MYSQLTEST1 CHARACTER SET LATIN2;
USE MYSQLTEST1;
CREATE TABLE t1 (a VARCHAR(255) CHARACTER SET LATIN2);
SET CHARACTER SET cp1250_latin2;
INSERT INTO t1 VALUES ('£¥ª¯');
INSERT INTO t1 VALUES ('£¥ª¯' '');
SELECT HEX(a) FROM t1;
DROP DATABASE MYSQLTEST1;
The SELECT statement returns:
HEX(a)
A3A1AAAF
A3A5AAAF
This output is incorrect, both rows must contain same values:
HEX(a)
A3A1AAAF
A3A1AAAF
Suggested fix:
Move m_lip.text_string_is_7bit() out of deferred itemize() function call back to the parser grammar:
diff --git a/sql/parse_tree_items.h b/sql/parse_tree_items.h
--- a/sql/parse_tree_items.h
+++ b/sql/parse_tree_items.h
@@ -489,10 +489,13 @@ class PTI_text_literal : public Item_string
typedef Item_string super;
protected:
+ bool is_7bit;
LEX_STRING literal;
- PTI_text_literal(const POS &pos, const LEX_STRING &literal_arg)
- : super(pos), literal(literal_arg)
+ PTI_text_literal(const POS &pos,
+ bool is_7bit_arg,
+ const LEX_STRING &literal_arg)
+ : super(pos), is_7bit(is_7bit_arg), literal(literal_arg)
{}
};
@@ -502,8 +505,10 @@ class PTI_text_literal_text_string : public PTI_text_literal
typedef PTI_text_literal super;
public:
- PTI_text_literal_text_string(const POS &pos, const LEX_STRING &literal)
- : super(pos, literal)
+ PTI_text_literal_text_string(const POS &pos,
+ bool is_7bit_arg,
+ const LEX_STRING &literal)
+ : super(pos, is_7bit_arg, literal)
{}
virtual bool itemize(Parse_context *pc, Item **res)
@@ -515,8 +520,7 @@ public:
LEX_STRING tmp;
const CHARSET_INFO *cs_con= thd->variables.collation_connection;
const CHARSET_INFO *cs_cli= thd->variables.character_set_client;
- uint repertoire= thd->m_parser_state->m_lip.text_string_is_7bit() &&
- my_charset_is_ascii_based(cs_cli) ?
+ uint repertoire= is_7bit && my_charset_is_ascii_based(cs_cli) ?
MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
if (thd->charset_is_collation_connection ||
(repertoire == MY_REPERTOIRE_ASCII &&
@@ -539,8 +543,10 @@ class PTI_text_literal_nchar_string : public PTI_text_literal
typedef PTI_text_literal super;
public:
- PTI_text_literal_nchar_string(const POS &pos, const LEX_STRING &literal)
- : super(pos, literal)
+ PTI_text_literal_nchar_string(const POS &pos,
+ bool is_7bit_arg,
+ const LEX_STRING &literal)
+ : super(pos, is_7bit_arg, literal)
{}
virtual bool itemize(Parse_context *pc, Item **res)
@@ -548,8 +554,7 @@ public:
if (super::itemize(pc, res))
return true;
- uint repertoire= pc->thd->m_parser_state->m_lip.text_string_is_7bit() ?
- MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+ uint repertoire= is_7bit ? MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
DBUG_ASSERT(my_charset_is_ascii_based(national_charset_info));
init(literal.str, literal.length, national_charset_info,
DERIVATION_COERCIBLE, repertoire);
@@ -566,9 +571,10 @@ class PTI_text_literal_underscore_charset : public PTI_text_literal
public:
PTI_text_literal_underscore_charset(const POS &pos,
+ bool is_7bit_arg,
const CHARSET_INFO *cs_arg,
const LEX_STRING &literal)
- : super(pos, literal), cs(cs_arg)
+ : super(pos, is_7bit_arg, literal), cs(cs_arg)
{}
virtual bool itemize(Parse_context *pc, Item **res)
@@ -592,9 +598,9 @@ class PTI_text_literal_concat : public PTI_text_literal
PTI_text_literal *head;
public:
- PTI_text_literal_concat(const POS &pos,
+ PTI_text_literal_concat(const POS &pos, bool is_7bit_arg,
PTI_text_literal *head_arg, const LEX_STRING &tail)
- : super(pos, tail), head(head_arg)
+ : super(pos, is_7bit_arg, tail), head(head_arg)
{}
virtual bool itemize(Parse_context *pc, Item **res)
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -12399,19 +12399,23 @@ load_data_set_elem:
text_literal:
TEXT_STRING
{
- $$= NEW_PTN PTI_text_literal_text_string(@$, $1);
+ $$= NEW_PTN PTI_text_literal_text_string(@$,
+ YYTHD->m_parser_state->m_lip.text_string_is_7bit(), $1);
}
| NCHAR_STRING
{
- $$= NEW_PTN PTI_text_literal_nchar_string(@$, $1);
+ $$= NEW_PTN PTI_text_literal_nchar_string(@$,
+ YYTHD->m_parser_state->m_lip.text_string_is_7bit(), $1);
}
| UNDERSCORE_CHARSET TEXT_STRING
{
- $$= NEW_PTN PTI_text_literal_underscore_charset(@$, $1, $2);
+ $$= NEW_PTN PTI_text_literal_underscore_charset(@$,
+ YYTHD->m_parser_state->m_lip.text_string_is_7bit(), $1, $2);
}
| text_literal TEXT_STRING_literal
{
- $$= NEW_PTN PTI_text_literal_concat(@$, $1, $2);
+ $$= NEW_PTN PTI_text_literal_concat(@$,
+ YYTHD->m_parser_state->m_lip.text_string_is_7bit(), $1, $2);
}
;