Description:
Python connector tries to encode binary columns with default schema encoding. But these columns should imo always be returned as "bytes" or "bytearray". This way I get not consistent results when selecting for examples hashes -> mix of "bytearray" and "string" types.
How to repeat:
CREATE TABLE `tmp` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`bin` binary(10) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
INSERT INTO tmp (bin) VALUES (0x48454c4c4f); # utf-8 "HELLO"
INSERT INTO tmp (bin) VALUES (0xFF7AB0DE50); # utf-8 garbage
In python:
cur.execute("SELECT * FROM tmp")
----->
(1, 'HELLO\x00\x00\x00\x00\x00') # string
(2, bytearray(b'\xffz\xb0\xdeP\x00\x00\x00\x00\x00')) # bytearray
Suggested fix:
in mysql.connector.conversion.MySQLConverter
def _STRING_to_python(self, value, dsc=None): # pylint: disable=C0103
"""
Note that a SET is a string too, but using the FieldFlag we can see
whether we have to split it.
Returns string typed columns as string type.
"""
if dsc is not None:
# Check if we deal with a SET
if dsc[7] & FieldFlag.SET:
return self._SET_to_python(value, dsc)
if dsc[7] & FieldFlag.BINARY:
return value
if self.charset == 'binary':
return value
if isinstance(value, (bytes, bytearray)) and self.use_unicode:
return value.decode(self.charset)
return value
in mysql.connector.protocol.MySQLProtocol - this fixes "prepared" cursors
def _parse_binary_values(self, fields, packet, charset='utf-8'):
"""Parse values from a binary result packet"""
null_bitmap_length = (len(fields) + 7 + 2) // 8
null_bitmap = [int(i) for i in packet[0:null_bitmap_length]]
packet = packet[null_bitmap_length:]
values = []
for pos, field in enumerate(fields):
if null_bitmap[int((pos + 2) / 8)] & (1 << (pos + 2) % 8):
values.append(None)
continue
elif field[1] in (FieldType.TINY, FieldType.SHORT,
FieldType.INT24,
FieldType.LONG, FieldType.LONGLONG):
(packet, value) = self._parse_binary_integer(packet, field)
values.append(value)
elif field[1] in (FieldType.DOUBLE, FieldType.FLOAT):
(packet, value) = self._parse_binary_float(packet, field)
values.append(value)
elif field[1] in (FieldType.DATETIME, FieldType.DATE,
FieldType.TIMESTAMP):
(packet, value) = self._parse_binary_timestamp(packet, field)
values.append(value)
elif field[1] == FieldType.TIME:
(packet, value) = self._parse_binary_time(packet, field)
values.append(value)
else:
(packet, value) = utils.read_lc_string(packet)
values.append(value if field[7] & FieldFlag.BINARY else value.decode(charset))
return tuple(values)
Description: Python connector tries to encode binary columns with default schema encoding. But these columns should imo always be returned as "bytes" or "bytearray". This way I get not consistent results when selecting for examples hashes -> mix of "bytearray" and "string" types. How to repeat: CREATE TABLE `tmp` ( `id` int(11) unsigned NOT NULL AUTO_INCREMENT, `bin` binary(10) DEFAULT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; INSERT INTO tmp (bin) VALUES (0x48454c4c4f); # utf-8 "HELLO" INSERT INTO tmp (bin) VALUES (0xFF7AB0DE50); # utf-8 garbage In python: cur.execute("SELECT * FROM tmp") -----> (1, 'HELLO\x00\x00\x00\x00\x00') # string (2, bytearray(b'\xffz\xb0\xdeP\x00\x00\x00\x00\x00')) # bytearray Suggested fix: in mysql.connector.conversion.MySQLConverter def _STRING_to_python(self, value, dsc=None): # pylint: disable=C0103 """ Note that a SET is a string too, but using the FieldFlag we can see whether we have to split it. Returns string typed columns as string type. """ if dsc is not None: # Check if we deal with a SET if dsc[7] & FieldFlag.SET: return self._SET_to_python(value, dsc) if dsc[7] & FieldFlag.BINARY: return value if self.charset == 'binary': return value if isinstance(value, (bytes, bytearray)) and self.use_unicode: return value.decode(self.charset) return value in mysql.connector.protocol.MySQLProtocol - this fixes "prepared" cursors def _parse_binary_values(self, fields, packet, charset='utf-8'): """Parse values from a binary result packet""" null_bitmap_length = (len(fields) + 7 + 2) // 8 null_bitmap = [int(i) for i in packet[0:null_bitmap_length]] packet = packet[null_bitmap_length:] values = [] for pos, field in enumerate(fields): if null_bitmap[int((pos + 2) / 8)] & (1 << (pos + 2) % 8): values.append(None) continue elif field[1] in (FieldType.TINY, FieldType.SHORT, FieldType.INT24, FieldType.LONG, FieldType.LONGLONG): (packet, value) = self._parse_binary_integer(packet, field) values.append(value) elif field[1] in (FieldType.DOUBLE, FieldType.FLOAT): (packet, value) = self._parse_binary_float(packet, field) values.append(value) elif field[1] in (FieldType.DATETIME, FieldType.DATE, FieldType.TIMESTAMP): (packet, value) = self._parse_binary_timestamp(packet, field) values.append(value) elif field[1] == FieldType.TIME: (packet, value) = self._parse_binary_time(packet, field) values.append(value) else: (packet, value) = utils.read_lc_string(packet) values.append(value if field[7] & FieldFlag.BINARY else value.decode(charset)) return tuple(values)