Description:
Python connector tries to encode binary columns with default schema encoding. But these columns should imo always be returned as "bytes" or "bytearray". This way I get not consistent results when selecting for examples hashes -> mix of "bytearray" and "string" types.
How to repeat:
CREATE TABLE `tmp` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`bin` binary(10) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
INSERT INTO tmp (bin) VALUES (0x48454c4c4f); # utf-8 "HELLO"
INSERT INTO tmp (bin) VALUES (0xFF7AB0DE50); # utf-8 garbage
In python:
cur.execute("SELECT * FROM tmp")
----->
(1, 'HELLO\x00\x00\x00\x00\x00') # string
(2, bytearray(b'\xffz\xb0\xdeP\x00\x00\x00\x00\x00')) # bytearray
Suggested fix:
in mysql.connector.conversion.MySQLConverter
def _STRING_to_python(self, value, dsc=None): # pylint: disable=C0103
"""
Note that a SET is a string too, but using the FieldFlag we can see
whether we have to split it.
Returns string typed columns as string type.
"""
if dsc is not None:
# Check if we deal with a SET
if dsc[7] & FieldFlag.SET:
return self._SET_to_python(value, dsc)
if dsc[7] & FieldFlag.BINARY:
return value
if self.charset == 'binary':
return value
if isinstance(value, (bytes, bytearray)) and self.use_unicode:
return value.decode(self.charset)
return value
in mysql.connector.protocol.MySQLProtocol - this fixes "prepared" cursors
def _parse_binary_values(self, fields, packet, charset='utf-8'):
"""Parse values from a binary result packet"""
null_bitmap_length = (len(fields) + 7 + 2) // 8
null_bitmap = [int(i) for i in packet[0:null_bitmap_length]]
packet = packet[null_bitmap_length:]
values = []
for pos, field in enumerate(fields):
if null_bitmap[int((pos + 2) / 8)] & (1 << (pos + 2) % 8):
values.append(None)
continue
elif field[1] in (FieldType.TINY, FieldType.SHORT,
FieldType.INT24,
FieldType.LONG, FieldType.LONGLONG):
(packet, value) = self._parse_binary_integer(packet, field)
values.append(value)
elif field[1] in (FieldType.DOUBLE, FieldType.FLOAT):
(packet, value) = self._parse_binary_float(packet, field)
values.append(value)
elif field[1] in (FieldType.DATETIME, FieldType.DATE,
FieldType.TIMESTAMP):
(packet, value) = self._parse_binary_timestamp(packet, field)
values.append(value)
elif field[1] == FieldType.TIME:
(packet, value) = self._parse_binary_time(packet, field)
values.append(value)
else:
(packet, value) = utils.read_lc_string(packet)
values.append(value if field[7] & FieldFlag.BINARY else value.decode(charset))
return tuple(values)