Description:
When an index hint forces a secondary index scan, the optimizer may fail to recognize that the secondary index already provides the required ordering through InnoDB's implicit primary-key suffix. As a result, a query that should be executable as a grouped covering index lookup is degraded into a plan that builds a deduplicating temporary table.
How to repeat:
DROP TABLE IF EXISTS t3;
CREATE TABLE t3 (
a INT,
b INT NOT NULL,
c INT NOT NULL,
d INT NOT NULL,
e VARCHAR(200) NOT NULL,
PRIMARY KEY (a, b, c, d),
KEY idx_ab (a, b)
) ENGINE=INNODB;
INSERT INTO t3 VALUES
(1,1,1,1,'pad'),(1,1,2,1,'pad'),(1,1,3,1,'pad'),
(2,2,1,1,'pad'),(2,2,2,1,'pad'),(2,2,3,1,'pad'),(2,2,4,1,'pad'),
(3,3,1,1,'pad'),(3,3,2,1,'pad');
ANALYZE TABLE t3;
EXPLAIN FORMAT=TREE
SELECT c FROM t3 WHERE a = 2 AND b = 2 GROUP BY c\G
-> Group (no aggregates) (cost=1.05 rows=4)
-> Covering index lookup on t3 using idx_ab (a=2, b=2) (cost=0.65 rows=4)
-- The hinted query no longer reuses the index order and instead creates a
-- temporary table.
EXPLAIN FORMAT=TREE
SELECT /*+index(t3 idx_ab)*/ c FROM t3 WHERE a = 2 AND b = 2 GROUP BY c;
-> Table scan on <temporary> (cost=1.69..3.60 rows=4)
-> Temporary table with deduplication (cost=1.05..1.05 rows=4)
-> Covering index lookup on t3 using idx_ab (a=2, b=2) (cost=0.65 rows=4)
Suggested fix:
Update the optimizer's key-part collection logic so that constant key parts are still tracked for the clustered primary key even when the primary key is excluded from table->keys_in_use_for_query by an index or outline hint.
diff --git a/sql/sql_optimizer.cc b/sql/sql_optimizer.cc
index 844da8599ff..bfec4e00297 100644
--- a/sql/sql_optimizer.cc
+++ b/sql/sql_optimizer.cc
@@ -7869,22 +7869,56 @@ static bool add_key_part(Key_use_array *keyuse_array, Key_field *key_field) {
TABLE *const table = tl->table;
for (uint key = 0; key < table->s->keys; key++) {
- if (!(table->keys_in_use_for_query.is_set(key))) continue;
if (table->key_info[key].flags & (HA_FULLTEXT | HA_SPATIAL))
continue; // ToDo: ft-keys in non-ft queries. SerG
+ const bool key_usable = table->keys_in_use_for_query.is_set(key);
+ /*
+ The clustered primary key is a special case: even when an index hint
+ (REF_SCAN/RANGE_SCAN/INDEX_SCAN/FORCE INDEX/...) excludes it from
+ keys_in_use_for_query, the InnoDB storage layer still appends the
+ primary key to every secondary index's sort order. test_if_order_by_key()
+ falls through into the primary key parts when matching ORDER BY /
+ GROUP BY against a secondary index, and that fall-through reads
+ const_key_parts[primary_key] to skip parts already bound by equality
+ predicates. If we leave const_key_parts[primary_key] empty here, the
+ optimizer fails to recognize that, for example,
+ REF_SCAN(t idx_ab) on WHERE a = c1 AND b = c2 GROUP BY c
+ (with PRIMARY KEY (a, b, c, ...)) already produces rows ordered by c,
+ and forces an unnecessary deduplicating temporary table. We narrow the
+ fix to the primary key only: it is the single key that is always used
+ in physical plans regardless of whether it appears in
+ keys_in_use_for_query, so it is the only key for which const_key_parts
+ must be tracked even when the key is "disabled". Other disabled keys
+ are not consulted by the optimizer downstream and we keep the original
+ skipping behavior for them.
+ */
+ const bool is_pk_const_tracking =
+ !key_usable && key == table->s->primary_key &&
+ table->s->primary_key != MAX_KEY &&
+ (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX);
+ if (!key_usable && !is_pk_const_tracking) continue;
+
const uint key_parts = actual_key_parts(&table->key_info[key]);
for (uint part = 0; part < key_parts; part++) {
if (field->eq(table->key_info[key].key_part[part].field)) {
- const Key_use keyuse(tl, key_field->val,
- key_field->val->used_tables(), key, part,
- key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL,
- (key_part_map)1 << part,
- ~(ha_rows)0, // will be set in optimize_keyuse
- key_field->null_rejecting, key_field->cond_guard,
- key_field->sj_pred_no);
- if (keyuse_array->push_back(keyuse))
- return true; /* purecov: inspected */
+ if (key_usable) {
+ const Key_use keyuse(tl, key_field->val,
+ key_field->val->used_tables(), key, part,
+ key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL,
+ (key_part_map)1 << part,
+ ~(ha_rows)0, // will be set in optimize_keyuse
+ key_field->null_rejecting, key_field->cond_guard,
+ key_field->sj_pred_no);
+ if (keyuse_array->push_back(keyuse))
+ return true; /* purecov: inspected */
+ } else if (key_field->val->const_for_execution() &&
+ !(key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL)) {
+ // Primary key is "disabled" by hint but still implicitly drives
+ // the secondary-index sort order in InnoDB. Record the constant
+ // bit so test_if_order_by_key() can skip it via PK suffix.
+ table->const_key_parts[key] |= (key_part_map)1 << part;
+ }
}
}
}
Description: When an index hint forces a secondary index scan, the optimizer may fail to recognize that the secondary index already provides the required ordering through InnoDB's implicit primary-key suffix. As a result, a query that should be executable as a grouped covering index lookup is degraded into a plan that builds a deduplicating temporary table. How to repeat: DROP TABLE IF EXISTS t3; CREATE TABLE t3 ( a INT, b INT NOT NULL, c INT NOT NULL, d INT NOT NULL, e VARCHAR(200) NOT NULL, PRIMARY KEY (a, b, c, d), KEY idx_ab (a, b) ) ENGINE=INNODB; INSERT INTO t3 VALUES (1,1,1,1,'pad'),(1,1,2,1,'pad'),(1,1,3,1,'pad'), (2,2,1,1,'pad'),(2,2,2,1,'pad'),(2,2,3,1,'pad'),(2,2,4,1,'pad'), (3,3,1,1,'pad'),(3,3,2,1,'pad'); ANALYZE TABLE t3; EXPLAIN FORMAT=TREE SELECT c FROM t3 WHERE a = 2 AND b = 2 GROUP BY c\G -> Group (no aggregates) (cost=1.05 rows=4) -> Covering index lookup on t3 using idx_ab (a=2, b=2) (cost=0.65 rows=4) -- The hinted query no longer reuses the index order and instead creates a -- temporary table. EXPLAIN FORMAT=TREE SELECT /*+index(t3 idx_ab)*/ c FROM t3 WHERE a = 2 AND b = 2 GROUP BY c; -> Table scan on <temporary> (cost=1.69..3.60 rows=4) -> Temporary table with deduplication (cost=1.05..1.05 rows=4) -> Covering index lookup on t3 using idx_ab (a=2, b=2) (cost=0.65 rows=4) Suggested fix: Update the optimizer's key-part collection logic so that constant key parts are still tracked for the clustered primary key even when the primary key is excluded from table->keys_in_use_for_query by an index or outline hint. diff --git a/sql/sql_optimizer.cc b/sql/sql_optimizer.cc index 844da8599ff..bfec4e00297 100644 --- a/sql/sql_optimizer.cc +++ b/sql/sql_optimizer.cc @@ -7869,22 +7869,56 @@ static bool add_key_part(Key_use_array *keyuse_array, Key_field *key_field) { TABLE *const table = tl->table; for (uint key = 0; key < table->s->keys; key++) { - if (!(table->keys_in_use_for_query.is_set(key))) continue; if (table->key_info[key].flags & (HA_FULLTEXT | HA_SPATIAL)) continue; // ToDo: ft-keys in non-ft queries. SerG + const bool key_usable = table->keys_in_use_for_query.is_set(key); + /* + The clustered primary key is a special case: even when an index hint + (REF_SCAN/RANGE_SCAN/INDEX_SCAN/FORCE INDEX/...) excludes it from + keys_in_use_for_query, the InnoDB storage layer still appends the + primary key to every secondary index's sort order. test_if_order_by_key() + falls through into the primary key parts when matching ORDER BY / + GROUP BY against a secondary index, and that fall-through reads + const_key_parts[primary_key] to skip parts already bound by equality + predicates. If we leave const_key_parts[primary_key] empty here, the + optimizer fails to recognize that, for example, + REF_SCAN(t idx_ab) on WHERE a = c1 AND b = c2 GROUP BY c + (with PRIMARY KEY (a, b, c, ...)) already produces rows ordered by c, + and forces an unnecessary deduplicating temporary table. We narrow the + fix to the primary key only: it is the single key that is always used + in physical plans regardless of whether it appears in + keys_in_use_for_query, so it is the only key for which const_key_parts + must be tracked even when the key is "disabled". Other disabled keys + are not consulted by the optimizer downstream and we keep the original + skipping behavior for them. + */ + const bool is_pk_const_tracking = + !key_usable && key == table->s->primary_key && + table->s->primary_key != MAX_KEY && + (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX); + if (!key_usable && !is_pk_const_tracking) continue; + const uint key_parts = actual_key_parts(&table->key_info[key]); for (uint part = 0; part < key_parts; part++) { if (field->eq(table->key_info[key].key_part[part].field)) { - const Key_use keyuse(tl, key_field->val, - key_field->val->used_tables(), key, part, - key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL, - (key_part_map)1 << part, - ~(ha_rows)0, // will be set in optimize_keyuse - key_field->null_rejecting, key_field->cond_guard, - key_field->sj_pred_no); - if (keyuse_array->push_back(keyuse)) - return true; /* purecov: inspected */ + if (key_usable) { + const Key_use keyuse(tl, key_field->val, + key_field->val->used_tables(), key, part, + key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL, + (key_part_map)1 << part, + ~(ha_rows)0, // will be set in optimize_keyuse + key_field->null_rejecting, key_field->cond_guard, + key_field->sj_pred_no); + if (keyuse_array->push_back(keyuse)) + return true; /* purecov: inspected */ + } else if (key_field->val->const_for_execution() && + !(key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL)) { + // Primary key is "disabled" by hint but still implicitly drives + // the secondary-index sort order in InnoDB. Record the constant + // bit so test_if_order_by_key() can skip it via PK suffix. + table->const_key_parts[key] |= (key_part_map)1 << part; + } } } }