Bug #110071 Foreign Key should not be added to Writeset-based Dependency Tracking.
Submitted: 15 Feb 2023 6:41 Modified: 15 Feb 2023 8:10
Reporter: Shun Yi Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: Row Based Replication ( RBR ) Severity:S2 (Serious)
Version:Any, 5.7, 8.0 OS:Any
Assigned to: CPU Architecture:Any

[15 Feb 2023 6:41] Shun Yi
Description:
The parent table has primary key.
The child table has a primary key, and the foreign key reference to the primary key of the parent table.

If some rows are inserted to parent, the Dependency Tracking will revert to COMMIT_ORDER and m_writeset_history will be clear.
There is no problem.

However, if we insert some rows to child with the same foreign keys, the same foreign key will be added to writeset and the rows can not be applied in parallel in the replicas.
Although the rows are different which can be applied in parallel, the last_committed is not the same.

================
Analyze
================
If child table does have primary key, the rows' conflict should be checked by the primary key and the checking of foreign key is useless.

If child table does not have primary key, the rows' conflict will be checked by COMMIT_ORDER.

when any rows are inserted to parent, the m_writeset_history will be clear.

How to repeat:
================
1. SQL
================

mysql> show variables like "%_dependency%";
+--------------------------------------------+----------+
| Variable_name                              | Value    |
+--------------------------------------------+----------+
| binlog_transaction_dependency_history_size | 25000    |
| binlog_transaction_dependency_tracking     | WRITESET |
+--------------------------------------------+----------+
2 rows in set (0.01 sec)

mysql> show variables like "%transaction_write_set_extraction";
+----------------------------------+----------+
| Variable_name                    | Value    |
+----------------------------------+----------+
| transaction_write_set_extraction | XXHASH64 |
+----------------------------------+----------+
1 row in set (0.01 sec)

mysql> CREATE TABLE parent (
    ->     id INT PRIMARY KEY,
    ->     did INT NOT NULL
    -> ) ENGINE=INNODB;
Query OK, 0 rows affected (0.05 sec)

mysql> CREATE TABLE child (
    ->     id INT PRIMARY KEY,
    ->     parent_id INT,
    ->     FOREIGN KEY (parent_id)
    ->         REFERENCES parent(id)
    -> ) ENGINE=INNODB;
Query OK, 0 rows affected (0.06 sec)

mysql> reset master;

mysql> insert into parent values (1, 1);
mysql> insert into parent values (2, 1);
mysql> select * from parent;
+----+-----+
| id | did |
+----+-----+
|  1 |   1 |
|  2 |   1 |
+----+-----+
2 rows in set (0.00 sec)

mysql> insert into child values (1, 1);
mysql> insert into child values (2, 1);
mysql> insert into child values (3, 1);
mysql> select * from child;
+----+-----------+
| id | parent_id |
+----+-----------+
|  1 |         1 |
|  2 |         1 |
|  3 |         1 |
+----+-----------+
3 rows in set (0.00 sec)

================
2. Binary log
================

#230215 12:28:09 server id 1  end_log_pos 236 CRC32 0x82d25b3c 	GTID	last_committed=0	sequence_number=1	rbr_only=yes	original_committed_timestamp=1676435289059339	immediate_commit_timestamp=1676435289059339	transaction_length=284
#230215 12:28:12 server id 1  end_log_pos 520 CRC32 0xfedefda1 	GTID	last_committed=1	sequence_number=2	rbr_only=yes	original_committed_timestamp=1676435292243890	immediate_commit_timestamp=1676435292243890	transaction_length=284
#230215 12:28:22 server id 1  end_log_pos 804 CRC32 0xd6226b5b 	GTID	last_committed=2	sequence_number=3	rbr_only=yes	original_committed_timestamp=1676435302868370	immediate_commit_timestamp=1676435302868370	transaction_length=281
#230215 12:28:52 server id 1  end_log_pos 1085 CRC32 0x58a7e98f 	GTID	last_committed=3	sequence_number=4rbr_only=yes	original_committed_timestamp=1676435332927335	immediate_commit_timestamp=1676435332927335	transaction_length=281
#230215 12:29:00 server id 1  end_log_pos 1366 CRC32 0x38bbb272 	GTID	last_committed=4	sequence_number=5rbr_only=yes	original_committed_timestamp=1676435340132907	immediate_commit_timestamp=1676435340132907	transaction_length=281

Suggested fix:

We should ignore generating the foreign key information as they should not participate in the conflicts detecting algorithm.

diff --git a/sql/rpl_write_set_handler.cc b/sql/rpl_write_set_handler.cc
index 8beaab2ddd8..bb0827b8ee9 100644
--- a/sql/rpl_write_set_handler.cc
+++ b/sql/rpl_write_set_handler.cc
@@ -719,131 +719,6 @@ bool add_pke(TABLE *table, THD *thd, const uchar *record) {
       }
     }
 
-    /*
-      Foreign keys handling.
-
-      OPTION_NO_FOREIGN_KEY_CHECKS bit in options_bits is set at two places
-
-      1) If the user executed 'SET foreign_key_checks= 0' on the local session
-      before executing the query.
-      or
-      2) We are applying a RBR event (i.e., the event is from a remote server)
-      and logic in Rows_log_event::do_apply_event found out that the event is
-      generated from a remote server session that disabled foreign_key_checks
-      (using 'SET foreign_key_checks=0').
-
-      In either of the above cases (i.e., the foreign key check is disabled for
-      the current query/current event), we should ignore generating
-      the foreign key information as they should not participate
-      in the conflicts detecting algorithm.
-    */
-    if (!(thd->variables.option_bits & OPTION_NO_FOREIGN_KEY_CHECKS) &&
-        table->s->foreign_keys > 0) {
-      TABLE_SHARE_FOREIGN_KEY_INFO *fk = table->s->foreign_key;
-      for (uint fk_number = 0; fk_number < table->s->foreign_keys;
-           fk_number++) {
-        /*
-          There are two situations on which there is no
-          unique_constraint_name, which means that the foreign key
-          must be skipped.
-
-          1) The referenced table was dropped using
-             foreign_key_checks= 0, on that case we cannot check
-             foreign key and need to skip it.
-
-          2) The foreign key does reference a non unique key, thence
-             it must be skipped since it cannot be used to check
-             conflicts/dependencies.
-
-             Example:
-               CREATE TABLE t1 (c1 INT PRIMARY KEY, c2 INT, KEY(c2));
-               CREATE TABLE t2 (x1 INT PRIMARY KEY, x2 INT,
-                                FOREIGN KEY (x2) REFERENCES t1(c2));
-
-               DELETE FROM t1 WHERE c1=1;
-                 does generate the PKEs:
-                   PRIMARY½test½4t1½21½1
-
-               INSERT INTO t2 VALUES (1,1);
-                 does generate the PKEs:
-                   PRIMARY½test½4t2½21½1
-
-               which does not contain PKE for the non unique key c2.
-        */
-        if (0 == fk[fk_number].unique_constraint_name.length) continue;
-
-        const std::string referenced_schema_name_length =
-            std::to_string(fk[fk_number].referenced_table_db.length);
-        const std::string referenced_table_name_length =
-            std::to_string(fk[fk_number].referenced_table_name.length);
-
-        /*
-          Prefix the hash keys with the referenced index name.
-        */
-        pke.clear();
-        pke.append(fk[fk_number].unique_constraint_name.str,
-                   fk[fk_number].unique_constraint_name.length);
-        pke.append(HASH_STRING_SEPARATOR);
-        pke.append(fk[fk_number].referenced_table_db.str,
-                   fk[fk_number].referenced_table_db.length);
-        pke.append(HASH_STRING_SEPARATOR);
-        pke.append(referenced_schema_name_length);
-        pke.append(fk[fk_number].referenced_table_name.str,
-                   fk[fk_number].referenced_table_name.length);
-        pke.append(HASH_STRING_SEPARATOR);
-        pke.append(referenced_table_name_length);
-
-        /*
-          Foreign key must not have a empty column list.
-        */
-        assert(fk[fk_number].columns > 0);
-        for (uint c = 0; c < fk[fk_number].columns; c++) {
-          for (uint field_number = 0; field_number < table->s->fields;
-               field_number++) {
-            Field *field = table->field[field_number];
-            if (field->is_null(ptrdiff)) continue;
-
-            if (!my_strcasecmp(system_charset_info, field->field_name,
-                               fk[fk_number].column_name[c].str)) {
-              /*
-                Update the field offset, since we may be operating on
-                table->record[0] or table->record[1] and both have
-                different offsets.
-              */
-              field->move_field_offset(ptrdiff);
-
-              const CHARSET_INFO *cs = field->charset();
-              int max_length = cs->coll->strnxfrmlen(cs, field->pack_length());
-              std::unique_ptr<uchar[]> pk_value(new uchar[max_length + 1]());
-
-              /*
-                convert to normalized string and store so that it can be
-                sorted using binary comparison functions like memcmp.
-              */
-              size_t length = field->make_sort_key(pk_value.get(), max_length);
-              pk_value[length] = 0;
-
-              pke.append(pointer_cast<char *>(pk_value.get()), length);
-              pke.append(HASH_STRING_SEPARATOR);
-              pke.append(std::to_string(length));
-
-              /* revert the field object record offset back */
-              field->move_field_offset(-ptrdiff);
-            }
-          }
-        }
-
-        if (generate_hash_pke(pke, thd
-#ifndef NDEBUG
-                              ,
-                              write_sets, hash_list
-#endif
-                              ))
-          return true;
-        writeset_hashes_added = true;
-      }
-    }
-
     if (table->s->foreign_key_parents > 0)
       ws_ctx->set_has_related_foreign_keys();
[15 Feb 2023 8:10] MySQL Verification Team
Hello Shun Yi,

Thank you for the report and feedback.

regards,
Umesh