Description:
For prepare statement + left join scenario, optimizer can not choose the optimal index, which result in generate many slow queries for production environment.
Normal select query can choose the optimal index.
Compared the optimizer trace, we can see Prepare/Execute query do not do "analyzing_range_alternatives" optimization, do not trigger index dive mechanisms to estimate the rows for specific equal condition, result in choose wrong index.
How to repeat:
CREATE TABLE t(a INT);
INSERT INTO t
WITH RECURSIVE tt (n) AS (
SELECT 1
UNION ALL
SELECT n + 1 FROM tt WHERE n < 10
)
SELECT n FROM tt;
CREATE TABLE `doc_order_header` (
`organizationId` varchar(10) NOT NULL DEFAULT 'NAIXUE',
`warehouseId` varchar(10) NOT NULL,
`orderNo` varchar(10) NOT NULL,
`waveNo` varchar(10) DEFAULT '*',
`soStatus` varchar(2) NOT NULL DEFAULT '00',
PRIMARY KEY (`organizationId`,`warehouseId`,`orderNo`),
KEY `I_DOC_ORDER_HEADER_OWW` (`organizationId`,`warehouseId`,`waveNo`)
)ENGINE=InnoDB;
INSERT INTO doc_order_header(warehouseId,waveNo,orderNo) SELECT t1.a, t2.a, t1.a*10+t2.a FROM t AS t1, t AS t2;
ANALYZE TABLE doc_order_header;
INSERT INTO t
WITH RECURSIVE tt (n) AS (
SELECT 11
UNION ALL
SELECT n + 1 FROM tt WHERE n < 100
)
SELECT n FROM tt;
CREATE TABLE `tsk_tasklists` (
`organizationId` varchar(10) NOT NULL DEFAULT 'NAIXUE',
`warehouseId` varchar(10) NOT NULL,
`taskId` varchar(10) NOT NULL,
`taskId_Sequence` varchar(10) NOT NULL,
`docNo` varchar(10) NOT NULL,
`groupTaskId` varchar(50) DEFAULT '',
PRIMARY KEY (`organizationId`,`warehouseId`,`taskId`,`taskId_Sequence`),
KEY `I_TSK_TASKLISTS_BYDOCNO` (`organizationId`,`warehouseId`,`docNo`)
)ENGINE=InnoDB;
INSERT INTO tsk_tasklists(warehouseId,taskId,taskId_Sequence,docNo) SELECT t1.a, t2.a,t1.a*100+t2.a,t2.a % 2 FROM t AS t1, t AS t2;
INSERT INTO tsk_tasklists(warehouseId,taskId,taskId_Sequence,docNo) SELECT 8, t2.a, t1.a*100+t2.a+10000,t2.a % 2 FROM t AS t1, t AS t2;
ANALYZE TABLE tsk_tasklists;
-- Table T choose index I_TSK_TASKLISTS_BYDOCNO
EXPLAIN
SELECT
A.organizationId,
A.warehouseId,
A.orderNo
FROM
doc_order_header A
LEFT JOIN tsk_tasklists T
ON A.organizationId = T.organizationId AND A.warehouseId = T.warehouseId AND A.orderNo = T.docNo
WHERE
A.organizationId = 'NAIXUE'
AND A.warehouseId = '8'
AND A.waveNo = '2'
AND (T.groupTaskId IS NULL OR T.groupTaskId = '');
prepare stmt1 from "
EXPLAIN
SELECT
A.organizationId,
A.warehouseId,
A.orderNo
FROM
doc_order_header A
LEFT JOIN tsk_tasklists T
ON A.organizationId = T.organizationId AND A.warehouseId = T.warehouseId AND A.orderNo = T.docNo
WHERE
A.organizationId = 'NAIXUE'
AND A.warehouseId = '8'
AND A.waveNo = '2'
AND (T.groupTaskId IS NULL OR T.groupTaskId = '')";
-- Table T choose primary, which is not the optimal index, much slower than choose index I_TSK_TASKLISTS_BYDOCNO
EXECUTE stmt1;
Suggested fix:
MySQL 8.0.32 introduce this issue. Refer to following commit:
https://github.com/mysql/mysql-server/commit/ddfe4e476ed6ecc7d76d79260a796bed9b3dde9f
Due to Item_cond_and copy_andor_structure from Table_ref::m_join_cond in function get_optimizable_join_conditions, which result in JOIN_TAB::join_cond() is different with JOIN_TAB::table_ref::join_cond().
We should change `condition = t->join_cond()` TO `condition = t->join_cond_optim()`
Description: For prepare statement + left join scenario, optimizer can not choose the optimal index, which result in generate many slow queries for production environment. Normal select query can choose the optimal index. Compared the optimizer trace, we can see Prepare/Execute query do not do "analyzing_range_alternatives" optimization, do not trigger index dive mechanisms to estimate the rows for specific equal condition, result in choose wrong index. How to repeat: CREATE TABLE t(a INT); INSERT INTO t WITH RECURSIVE tt (n) AS ( SELECT 1 UNION ALL SELECT n + 1 FROM tt WHERE n < 10 ) SELECT n FROM tt; CREATE TABLE `doc_order_header` ( `organizationId` varchar(10) NOT NULL DEFAULT 'NAIXUE', `warehouseId` varchar(10) NOT NULL, `orderNo` varchar(10) NOT NULL, `waveNo` varchar(10) DEFAULT '*', `soStatus` varchar(2) NOT NULL DEFAULT '00', PRIMARY KEY (`organizationId`,`warehouseId`,`orderNo`), KEY `I_DOC_ORDER_HEADER_OWW` (`organizationId`,`warehouseId`,`waveNo`) )ENGINE=InnoDB; INSERT INTO doc_order_header(warehouseId,waveNo,orderNo) SELECT t1.a, t2.a, t1.a*10+t2.a FROM t AS t1, t AS t2; ANALYZE TABLE doc_order_header; INSERT INTO t WITH RECURSIVE tt (n) AS ( SELECT 11 UNION ALL SELECT n + 1 FROM tt WHERE n < 100 ) SELECT n FROM tt; CREATE TABLE `tsk_tasklists` ( `organizationId` varchar(10) NOT NULL DEFAULT 'NAIXUE', `warehouseId` varchar(10) NOT NULL, `taskId` varchar(10) NOT NULL, `taskId_Sequence` varchar(10) NOT NULL, `docNo` varchar(10) NOT NULL, `groupTaskId` varchar(50) DEFAULT '', PRIMARY KEY (`organizationId`,`warehouseId`,`taskId`,`taskId_Sequence`), KEY `I_TSK_TASKLISTS_BYDOCNO` (`organizationId`,`warehouseId`,`docNo`) )ENGINE=InnoDB; INSERT INTO tsk_tasklists(warehouseId,taskId,taskId_Sequence,docNo) SELECT t1.a, t2.a,t1.a*100+t2.a,t2.a % 2 FROM t AS t1, t AS t2; INSERT INTO tsk_tasklists(warehouseId,taskId,taskId_Sequence,docNo) SELECT 8, t2.a, t1.a*100+t2.a+10000,t2.a % 2 FROM t AS t1, t AS t2; ANALYZE TABLE tsk_tasklists; -- Table T choose index I_TSK_TASKLISTS_BYDOCNO EXPLAIN SELECT A.organizationId, A.warehouseId, A.orderNo FROM doc_order_header A LEFT JOIN tsk_tasklists T ON A.organizationId = T.organizationId AND A.warehouseId = T.warehouseId AND A.orderNo = T.docNo WHERE A.organizationId = 'NAIXUE' AND A.warehouseId = '8' AND A.waveNo = '2' AND (T.groupTaskId IS NULL OR T.groupTaskId = ''); prepare stmt1 from " EXPLAIN SELECT A.organizationId, A.warehouseId, A.orderNo FROM doc_order_header A LEFT JOIN tsk_tasklists T ON A.organizationId = T.organizationId AND A.warehouseId = T.warehouseId AND A.orderNo = T.docNo WHERE A.organizationId = 'NAIXUE' AND A.warehouseId = '8' AND A.waveNo = '2' AND (T.groupTaskId IS NULL OR T.groupTaskId = '')"; -- Table T choose primary, which is not the optimal index, much slower than choose index I_TSK_TASKLISTS_BYDOCNO EXECUTE stmt1; Suggested fix: MySQL 8.0.32 introduce this issue. Refer to following commit: https://github.com/mysql/mysql-server/commit/ddfe4e476ed6ecc7d76d79260a796bed9b3dde9f Due to Item_cond_and copy_andor_structure from Table_ref::m_join_cond in function get_optimizable_join_conditions, which result in JOIN_TAB::join_cond() is different with JOIN_TAB::table_ref::join_cond(). We should change `condition = t->join_cond()` TO `condition = t->join_cond_optim()`