From 6c4027b2038e3d6c8e55b124368e911ca4a39093 Mon Sep 17 00:00:00 2001 From: Vilnis Termanis Date: Thu, 14 Jun 2018 11:04:59 +0100 Subject: [PATCH] Fix decoding of binary collations for non-binary character sets - Previously relied on BINARY flag, which only indicates that the collation is binary, not whether the character set is - Use character set id to determine whether to tread field as binary. Expanded description tuple to include said id. - WARNING: Unit tests not updated --- lib/mysql/connector/connection_cext.py | 3 ++- lib/mysql/connector/conversion.py | 7 ++++--- lib/mysql/connector/protocol.py | 3 ++- src/include/mysql_capi_conversion.h | 2 +- src/mysql_capi.c | 9 +++++---- src/mysql_capi_conversion.c | 19 ++++++++++--------- 6 files changed, 24 insertions(+), 19 deletions(-) diff --git a/lib/mysql/connector/connection_cext.py b/lib/mysql/connector/connection_cext.py index e0cd5fa..ff1256d 100644 --- a/lib/mysql/connector/connection_cext.py +++ b/lib/mysql/connector/connection_cext.py @@ -341,7 +341,8 @@ def fetch_eof_columns(self): None, None, ~int(col[9]) & FieldFlag.NOT_NULL, - int(col[9]) + int(col[9]), + int(col[6]) )) return { diff --git a/lib/mysql/connector/conversion.py b/lib/mysql/connector/conversion.py index 8a80d88..955f6e9 100644 --- a/lib/mysql/connector/conversion.py +++ b/lib/mysql/connector/conversion.py @@ -591,7 +591,7 @@ def _JSON_to_python(self, value, dsc=None): # pylint: disable=C0103 # Check if we deal with a SET if dsc[7] & FieldFlag.SET: return self._SET_to_python(value, dsc) - if dsc[7] & FieldFlag.BINARY: + if dsc[8] == 63: if self.charset != 'binary': try: return value.decode(self.charset) @@ -613,7 +613,8 @@ def _STRING_to_python(self, value, dsc=None): # pylint: disable=C0103 # Check if we deal with a SET if dsc[7] & FieldFlag.SET: return self._SET_to_python(value, dsc) - if dsc[7] & FieldFlag.BINARY: + # Binary field character set + if dsc[8] == 63: if self.charset != 'binary': try: return value.decode(self.charset) @@ -634,7 +635,7 @@ def _STRING_to_python(self, value, dsc=None): # pylint: disable=C0103 def _BLOB_to_python(self, value, dsc=None): # pylint: disable=C0103 """Convert BLOB data type to Python""" if dsc is not None: - if dsc[7] & FieldFlag.BINARY: + if dsc[8] == 63: if PY2: return value return bytes(value) diff --git a/lib/mysql/connector/protocol.py b/lib/mysql/connector/protocol.py index 47f4b99..b26f19f 100644 --- a/lib/mysql/connector/protocol.py +++ b/lib/mysql/connector/protocol.py @@ -250,7 +250,7 @@ def parse_column(self, packet, charset='utf-8'): (packet, _) = utils.read_lc_string(packet) # org_name try: - (_, _, field_type, + (charset_id, _, field_type, flags, _) = struct_unpack('use_unicode); if (!value) { @@ -2487,7 +2488,7 @@ MySQL_fetch_row(MySQL *self) } else if (field_type == MYSQL_TYPE_BLOB) { - value= mytopy_string(row[i], field_lengths[i], field_flags, + value= mytopy_string(row[i], field_lengths[i], field_charset_id, charset, self->use_unicode); PyTuple_SET_ITEM(result_row, i, value); } @@ -2500,7 +2501,7 @@ MySQL_fetch_row(MySQL *self) else { // Do our best to convert whatever we got from MySQL to a str/bytes - value = mytopy_string(row[i], field_lengths[i], field_flags, + value = mytopy_string(row[i], field_lengths[i], field_charset_id, charset, self->use_unicode); PyTuple_SET_ITEM(result_row, i, value); } diff --git a/src/mysql_capi_conversion.c b/src/mysql_capi_conversion.c index ecda11e..fcfe305 100644 --- a/src/mysql_capi_conversion.c +++ b/src/mysql_capi_conversion.c @@ -729,25 +729,25 @@ pytomy_decimal(PyObject *obj) @param data string to be converted @param length length of data - @param flags field flags - @param charset character used for decoding + @param charset_id character set of field + @param charset_out character used for decoding @param use_unicode return Unicode @return Converted string - @retval PyUnicode if not BINARY_FLAG + @retval PyUnicode if binary charset_id @retval PyBytes Python v3 if not use_unicode @retval PyString Python v2 if not use_unicode @retval NULL Exception */ PyObject* mytopy_string(const char *data, const unsigned long length, - const unsigned long flags, const char *charset, + const unsigned long charset_id, const char *charset_out, unsigned int use_unicode) { - if (!charset || !data) { + if (!charset_out || !data) { printf("\n==> here "); - if (charset) { - printf(" charset:%s", charset); + if (charset_out) { + printf(" charset:%s", charset_out); } if (data) { printf(" data:'%s'", data); @@ -756,9 +756,10 @@ mytopy_string(const char *data, const unsigned long length, return NULL; } - if (!(flags & BINARY_FLAG) && use_unicode && strcmp(charset, "binary") != 0) + // 63 designates binary character set for field + if (63 != charset_id && use_unicode && strcmp(charset_out, "binary") != 0) { - return PyUnicode_Decode(data, length, charset, NULL); + return PyUnicode_Decode(data, length, charset_out, NULL); } else {