From 1b750d4c90d5130aec9763fe98f191c424a798d0 Mon Sep 17 00:00:00 2001 From: Venkatesh Prasad Date: Thu, 6 Jun 2024 14:17:30 +0530 Subject: [PATCH] Bug#114830 MySQL converts collation of date data type in ibd but data dictionary https://bugs.mysql.com/bug.php?id=114830 Problem ======= When MySQL converts the charset of a table, it converts the date and time data types columns in data dictionary and SDI. However, the new collation_id in the sdi does not match that of the Field object. Analysis ======== In MySQL, temporal types are always stored and compared using my_charset_latin1 charset. During the execution of ALTER TABLE CONVERT TO CHARACTER SET, MySQL changes the charset and collation stored in data-dictionary and SDI for temporal columns to a different collation_id. In practice, this new collation_id is ignored when such columns are stored/compared. Corresponding Field objects are not updated to use this new collation (Actually, Field objects for temporal types are hardcoded to use my_charset_latin1). This new collation is not visible in I_S and SHOW CREATE TABLE output. It will be ignored by CREATE TABLE LIKE and rewritten by ALTER TABLE since both these statements use info from Field objects of source table to produce Create_field objects describing columns of new table/new version of the table. For example: CREATE TABLE a(dt DATETIME); $ ./bin/ibd2sdi ./var/mysqld.1/data/test/a.ibd "name": "dt", "collation_id": 8 ALTER TABLE a CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; $ ./bin/ibd2sdi ./var/mysqld.1/data/test/a.ibd "name": "dt", "collation_id": 224 ALTER TABLE a ENGINE = INNODB; $ ./bin/ibd2sdi ./var/mysqld.1/data/test/a.ibd "name": "dt", "collation_id": 8 Particularly, for new temporal types that support fractional seconds collation_id from DD/SDI affects the InnoDB "precise type" describing the column and stored in .cfg. Because of this, incorrect collation_id in DD/SDI for such columns results in incorrect "precise type" in .cfg file. As consequence we get Schema mismatch `(Column a precise type mismatch.)` error when ALTER TABLE IMPORT TABLESPACE compares the "precise type" from such .cfg file with "precise type" for column in table version being imported into. The latter is based on collation_id which ultimately comes from Field object and always corresponds to my_charset_latin1 for temporal types. Note that this problem do not affect scenarios when we import table with .cfg file that was generated by MySQL's FLUSH TABLE FOR EXPORT command, as the latter doesn't use DD/SDI to calculate "precise type" but gets information from Field object in table being exported instead. Solution ======== This commit changes ALTER TABLE CONVERT TO CHARACTER SET command to not alter the character set for temporal types stored in data-dictionary/SDI. In other words, we now force the server to always use my_charset_latin1 in DD/SDI for temporal types. Approved by: Dmitry Lenev --- mysql-test/r/temporal_type_collation.result | 95 +++++++++++++++ mysql-test/t/temporal_type_collation.test | 126 ++++++++++++++++++++ sql/sql_table.cc | 6 + 3 files changed, 227 insertions(+) create mode 100644 mysql-test/r/temporal_type_collation.result create mode 100644 mysql-test/t/temporal_type_collation.test diff --git a/mysql-test/r/temporal_type_collation.result b/mysql-test/r/temporal_type_collation.result new file mode 100644 index 000000000000..aad33f3461cf --- /dev/null +++ b/mysql-test/r/temporal_type_collation.result @@ -0,0 +1,95 @@ +CREATE TABLE test ( +dt datetime primary key, +datetxt varchar(10) GENERATED ALWAYS AS (DATE(dt)) STORED, +timetxt varchar(10) GENERATED ALWAYS AS (TIME(dt)) STORED +) ENGINE=InnoDB; +SHOW CREATE TABLE test; +Table Create Table +test CREATE TABLE `test` ( + `dt` datetime NOT NULL, + `datetxt` varchar(10) GENERATED ALWAYS AS (cast(`dt` as date)) STORED, + `timetxt` varchar(10) GENERATED ALWAYS AS (cast(`dt` as time)) STORED, + PRIMARY KEY (`dt`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci +FLUSH TABLE test FOR EXPORT; +UNLOCK TABLES; +# +# Assert that the collation_id is my_charset_numericd in the SDI. +# +include/assert_grep.inc [Assert that collation_id is 8 in the sdi] +# +# Assert that the collation_id does not change after ALTER TABLE +# ENGINE=InnoDB +# +ALTER TABLE test ENGINE=InnoDB; +include/assert_grep.inc [Assert that collation_id is 8 in the sdi] +# +# Assert that the collation_id does not change after ALTER TABLE CONVERT +# TO CHARACTER SET +# +ALTER TABLE test CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci; +include/assert_grep.inc [Assert that collation_id is 8 in the sdi] +# +# Insert some data and verify that comparision with varchar columns are handled properly. +# +INSERT INTO test(dt) VALUES("1997-11-30 12:30:45"),("1999-12-13 10:11:53"); +SELECT * FROM test; +dt datetxt timetxt +1997-11-30 12:30:45 1997-11-30 12:30:45 +1999-12-13 10:11:53 1999-12-13 10:11:53 +# +# Verify that there are two rows in the table +# +SELECT * FROM test WHERE DATE(dt) = datetxt; +dt datetxt timetxt +1997-11-30 12:30:45 1997-11-30 12:30:45 +1999-12-13 10:11:53 1999-12-13 10:11:53 +SELECT * FROM test WHERE TIME(dt) = timetxt; +dt datetxt timetxt +1997-11-30 12:30:45 1997-11-30 12:30:45 +1999-12-13 10:11:53 1999-12-13 10:11:53 +# +# Change the character set of the table to utf8mb4_unicode_ci +# +ALTER TABLE test CONVERT TO CHARACTER SET utf8mb4 collate utf8mb4_unicode_ci; +# +# Verify that the comparision works after changing the charset of the table +# +SELECT * FROM test WHERE DATE(dt) = datetxt; +dt datetxt timetxt +1997-11-30 12:30:45 1997-11-30 12:30:45 +1999-12-13 10:11:53 1999-12-13 10:11:53 +SELECT * FROM test WHERE TIME(dt) = timetxt; +dt datetxt timetxt +1997-11-30 12:30:45 1997-11-30 12:30:45 +1999-12-13 10:11:53 1999-12-13 10:11:53 +# +# Verify that the comparision works after changing the datatype of dt column +# +ALTER TABLE test MODIFY dt TIMESTAMP(6); +INSERT INTO test(dt) VALUES("1999-08-15 11:38:25.123456"),("2007-07-29 11:15:34.245147"); +# +# Change the character set of the table to utf32_unicode_ci +# +ALTER TABLE test CONVERT TO CHARACTER SET utf32 COLLATE utf32_unicode_ci; +# +# Verify that there are four rows in the table +# Note: We use STUBSTRING_INDEX to do the comparision as the +# TIME(dt) returns decimal value as well. +# +# +# Verify that the comparision works after changing the charset of the table +# +SELECT * FROM test WHERE DATE(dt) = datetxt; +dt datetxt timetxt +1997-11-30 12:30:45.000000 1997-11-30 12:30:45 +1999-08-15 11:38:25.123456 1999-08-15 11:38:25 +1999-12-13 10:11:53.000000 1999-12-13 10:11:53 +2007-07-29 11:15:34.245147 2007-07-29 11:15:34 +SELECT * FROM test WHERE SUBSTRING_INDEX(TIME(dt),'.',1) = timetxt; +dt datetxt timetxt +1997-11-30 12:30:45.000000 1997-11-30 12:30:45 +1999-08-15 11:38:25.123456 1999-08-15 11:38:25 +1999-12-13 10:11:53.000000 1999-12-13 10:11:53 +2007-07-29 11:15:34.245147 2007-07-29 11:15:34 +DROP TABLE test; diff --git a/mysql-test/t/temporal_type_collation.test b/mysql-test/t/temporal_type_collation.test new file mode 100644 index 000000000000..defcfa31224e --- /dev/null +++ b/mysql-test/t/temporal_type_collation.test @@ -0,0 +1,126 @@ +################################################################################ +# === Purpose === +# This test ensures that server does not modify the charset and collation of +# temporal types and comparision of temporal values with other charsets are +# handled smoothly by the server. +# +# === References === +# Bug#114830: MySQL converts collation of date data type in ibd but data dictionary +# PS-9219: MySQL converts collection of date data type in ibd but data dictionary +################################################################################ + +CREATE TABLE test ( + dt datetime primary key, + datetxt varchar(10) GENERATED ALWAYS AS (DATE(dt)) STORED, + timetxt varchar(10) GENERATED ALWAYS AS (TIME(dt)) STORED +) ENGINE=InnoDB; + +SHOW CREATE TABLE test; + +# Flush all pages to disk before running ibd2sdi +FLUSH TABLE test FOR EXPORT; +UNLOCK TABLES; + +################################################################################ +# Test - 1: Verify that collation of temporal types remains as +# my_charset_numeric (collation_id 8) in the SDI. +################################################################################ +--let $MYSQLD_DATADIR=`SELECT @@DATADIR` +--exec $IBD2SDI $MYSQLD_DATADIR/test/test.ibd -d $MYSQL_TMP_DIR/test.json 2>&1 + +--echo # +--echo # Assert that the collation_id is my_charset_numericd in the SDI. +--echo # + +--let $assert_select= \"collation_id\": 8, +--let $assert_text= Assert that collation_id is 8 in the sdi +--let $assert_file= $MYSQL_TMP_DIR/test.json +--let $assert_only_after= \"column_type_utf8\": \"datetime\", +--let $assert_count= 1 +--source include/assert_grep.inc + +--echo # +--echo # Assert that the collation_id does not change after ALTER TABLE +--echo # ENGINE=InnoDB +--echo # +ALTER TABLE test ENGINE=InnoDB; + +--let $assert_select= \"collation_id\": 8, +--let $assert_text= Assert that collation_id is 8 in the sdi +--let $assert_file= $MYSQL_TMP_DIR/test.json +--let $assert_only_after= \"column_type_utf8\": \"datetime\", +--let $assert_count= 1 +--source include/assert_grep.inc + +--echo # +--echo # Assert that the collation_id does not change after ALTER TABLE CONVERT +--echo # TO CHARACTER SET +--echo # +ALTER TABLE test CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci; + +--let $assert_select= \"collation_id\": 8, +--let $assert_text= Assert that collation_id is 8 in the sdi +--let $assert_file= $MYSQL_TMP_DIR/test.json +--let $assert_only_after= \"column_type_utf8\": \"datetime\", +--let $assert_count= 1 +--source include/assert_grep.inc + + +################################################################################ +# Test - 2: Verify that comparision with varchar columns are handled properly. +################################################################################ +--echo # +--echo # Insert some data and verify that comparision with varchar columns are handled properly. +--echo # +INSERT INTO test(dt) VALUES("1997-11-30 12:30:45"),("1999-12-13 10:11:53"); +SELECT * FROM test; + +--echo # +--echo # Verify that there are two rows in the table +--echo # +--assert(`SELECT COUNT(*)=2 FROM test WHERE DATE(dt) = datetxt`) +--assert(`SELECT COUNT(*)=2 FROM test WHERE TIME(dt) = timetxt`) + +SELECT * FROM test WHERE DATE(dt) = datetxt; +SELECT * FROM test WHERE TIME(dt) = timetxt; + +--echo # +--echo # Change the character set of the table to utf8mb4_unicode_ci +--echo # +ALTER TABLE test CONVERT TO CHARACTER SET utf8mb4 collate utf8mb4_unicode_ci; + +--echo # +--echo # Verify that the comparision works after changing the charset of the table +--echo # +SELECT * FROM test WHERE DATE(dt) = datetxt; +SELECT * FROM test WHERE TIME(dt) = timetxt; + +--echo # +--echo # Verify that the comparision works after changing the datatype of dt column +--echo # +ALTER TABLE test MODIFY dt TIMESTAMP(6); +INSERT INTO test(dt) VALUES("1999-08-15 11:38:25.123456"),("2007-07-29 11:15:34.245147"); + + +--echo # +--echo # Change the character set of the table to utf32_unicode_ci +--echo # +ALTER TABLE test CONVERT TO CHARACTER SET utf32 COLLATE utf32_unicode_ci; + +--echo # +--echo # Verify that there are four rows in the table +--echo # Note: We use STUBSTRING_INDEX to do the comparision as the +--echo # TIME(dt) returns decimal value as well. +--echo # +--assert(`SELECT COUNT(*)=4 FROM test WHERE DATE(dt) = datetxt`) +--assert(`SELECT COUNT(*)=4 FROM test WHERE SUBSTRING_INDEX(TIME(dt),'.',1) = timetxt`) + +--echo # +--echo # Verify that the comparision works after changing the charset of the table +--echo # +SELECT * FROM test WHERE DATE(dt) = datetxt; +SELECT * FROM test WHERE SUBSTRING_INDEX(TIME(dt),'.',1) = timetxt; + +# Cleanup +--remove_file $MYSQL_TMP_DIR/test.json +DROP TABLE test; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 6438d9e25edc..a5ef925d9026 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -4180,6 +4180,12 @@ const CHARSET_INFO *get_sql_field_charset(const Create_field *sql_field, */ if (sql_field->is_array || cs == &my_charset_bin) return cs; + /* + Temporal types always use my_charset_numeric. + Ensure that it is not changed when altering table charset. + */ + if (is_temporal_real_type(sql_field->sql_type)) return &my_charset_numeric; + /* table_charset is set only in ALTER TABLE t1 CONVERT TO CHARACTER SET csname when we want to change character set for all varchar/char columns.