Description:
When running mysqlsh util.dumpInstance() on a table that has a composite PRIMARY KEY (three BIGINT columns), the chunking queries generated by the dump process do not use the entire PK in their range conditions.
This leads to unbalanced chunks and potential performance degradation during dumping and restore.
For example:
CREATE TABLE bug_pk3 (
k1 BIGINT NOT NULL,
k2 BIGINT NOT NULL,
k3 BIGINT NOT NULL,
PRIMARY KEY (k1, k2, k3)
) ENGINE=InnoDB;
2025-10-23T16:20:03.970852Z 19 Query EXPLAIN FORMAT=JSON SELECT COUNT(*) FROM `test`.`bug_pk3` WHERE `k1` BETWEEN 1 AND 2 ORDER BY `k1`,`k2`,`k3`/* mysqlsh dumpInstance, chunking table `test`.`bug_pk3`, ID: 0 */
2025-10-23T16:20:04.002270Z 21 Query SELECT `k1`,`k2`,`k3` FROM `test`.`bug_pk3` WHERE (`k1`=5) ORDER BY `k1`,`k2`,`k3` /* mysqlsh dumpInstance, dumping table `test`.`bug_pk3`, ID: chunk 4 */
How to repeat:
/* Create table and load data */
DROP TABLE IF EXISTS bug_pk3;
CREATE TABLE bug_pk3 (
k1 BIGINT NOT NULL,
k2 BIGINT NOT NULL,
k3 BIGINT NOT NULL,
PRIMARY KEY (k1, k2, k3)
) ENGINE=InnoDB;
-- Use numbers table technique (non-recursive)
INSERT INTO bug_pk3 (k1, k2, k3)
SELECT
(a.n) AS k1,
(b.n) AS k2,
(c.n) AS k3
FROM
(SELECT @rownum:=@rownum+1 AS n FROM
(SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4
UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) t1,
(SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4
UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) t2,
(SELECT @rownum:=0) r
) a
CROSS JOIN (
SELECT @rownum2:=@rownum2+1 AS n FROM
(SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4
UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) t3,
(SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4
UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) t4,
(SELECT @rownum2:=0) r2
) b
CROSS JOIN (
SELECT @rownum3:=@rownum3+1 AS n FROM
(SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4
UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) t5,
(SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4
UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) t6,
(SELECT @rownum3:=0) r3
) c
LIMIT 1000000;
/* Enable general log to verify */
SET GLOBAL general_log=1;
/* Dump data int the smallest possible chunk to facilitate reproducing the issue */
util.dumpInstance("./", {
threads: 4,
dryRun: false,
consistent: true,
compatibility: ["strip_definers"],
bytesPerChunk: "128K",
maxRate: "500M",
ocimds: false,
showProgress: true
});
Suggested fix:
Using the full composite primary key for chunking ensures balanced data distribution and consistent performance. Tools like pt-archiver implement this approach by iterating over the complete primary key tuple (k1, k2, k3) in lexicographical order, using the last processed key as the continuation point for the next batch. This enables efficient index-range scans and avoids unbalanced chunks that can occur when only part of the key is used.
Description: When running mysqlsh util.dumpInstance() on a table that has a composite PRIMARY KEY (three BIGINT columns), the chunking queries generated by the dump process do not use the entire PK in their range conditions. This leads to unbalanced chunks and potential performance degradation during dumping and restore. For example: CREATE TABLE bug_pk3 ( k1 BIGINT NOT NULL, k2 BIGINT NOT NULL, k3 BIGINT NOT NULL, PRIMARY KEY (k1, k2, k3) ) ENGINE=InnoDB; 2025-10-23T16:20:03.970852Z 19 Query EXPLAIN FORMAT=JSON SELECT COUNT(*) FROM `test`.`bug_pk3` WHERE `k1` BETWEEN 1 AND 2 ORDER BY `k1`,`k2`,`k3`/* mysqlsh dumpInstance, chunking table `test`.`bug_pk3`, ID: 0 */ 2025-10-23T16:20:04.002270Z 21 Query SELECT `k1`,`k2`,`k3` FROM `test`.`bug_pk3` WHERE (`k1`=5) ORDER BY `k1`,`k2`,`k3` /* mysqlsh dumpInstance, dumping table `test`.`bug_pk3`, ID: chunk 4 */ How to repeat: /* Create table and load data */ DROP TABLE IF EXISTS bug_pk3; CREATE TABLE bug_pk3 ( k1 BIGINT NOT NULL, k2 BIGINT NOT NULL, k3 BIGINT NOT NULL, PRIMARY KEY (k1, k2, k3) ) ENGINE=InnoDB; -- Use numbers table technique (non-recursive) INSERT INTO bug_pk3 (k1, k2, k3) SELECT (a.n) AS k1, (b.n) AS k2, (c.n) AS k3 FROM (SELECT @rownum:=@rownum+1 AS n FROM (SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) t1, (SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) t2, (SELECT @rownum:=0) r ) a CROSS JOIN ( SELECT @rownum2:=@rownum2+1 AS n FROM (SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) t3, (SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) t4, (SELECT @rownum2:=0) r2 ) b CROSS JOIN ( SELECT @rownum3:=@rownum3+1 AS n FROM (SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) t5, (SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) t6, (SELECT @rownum3:=0) r3 ) c LIMIT 1000000; /* Enable general log to verify */ SET GLOBAL general_log=1; /* Dump data int the smallest possible chunk to facilitate reproducing the issue */ util.dumpInstance("./", { threads: 4, dryRun: false, consistent: true, compatibility: ["strip_definers"], bytesPerChunk: "128K", maxRate: "500M", ocimds: false, showProgress: true }); Suggested fix: Using the full composite primary key for chunking ensures balanced data distribution and consistent performance. Tools like pt-archiver implement this approach by iterating over the complete primary key tuple (k1, k2, k3) in lexicographical order, using the last processed key as the continuation point for the next batch. This enables efficient index-range scans and avoids unbalanced chunks that can occur when only part of the key is used.